In [1]:
from nltk.corpus.reader import TaggedCorpusReader
from nltk.tag import AffixTagger
import pickle

In [2]:
#trainer
reader = TaggedCorpusReader('.', 'latin_training_set.pos')
train_sents = reader.tagged_sents()

2-char prefix tagger


In [3]:
two_prefix_tagger = AffixTagger(train_sents, affix_length=2)

In [4]:
two_prefix_tagger.evaluate(train_sents)


Out[4]:
0.11491635775172647

3-char prefix tagger


In [5]:
three_prefix_tagger = AffixTagger(train_sents, affix_length=3)

In [6]:
three_prefix_tagger.evaluate(train_sents)


Out[6]:
0.15512861524565794

4-char prefix tagger


In [7]:
four_prefix_tagger = AffixTagger(train_sents, affix_length=4)

In [8]:
four_prefix_tagger.evaluate(train_sents)


Out[8]:
0.16120655589635513

2-char suffix tagger


In [9]:
suffix_tagger = AffixTagger(train_sents, affix_length=-2)

In [10]:
suffix_tagger.evaluate(train_sents)


Out[10]:
0.22117682479348175

3-char suffix tagger


In [11]:
three_suffix_tagger = AffixTagger(train_sents, affix_length=-3)

In [12]:
three_suffix_tagger.evaluate(train_sents)


Out[12]:
0.2615772538245865

4-char suffix tagger


In [13]:
four_suffix_tagger = AffixTagger(train_sents, affix_length=-4)

In [14]:
four_suffix_tagger.evaluate(train_sents)


Out[14]:
0.2749374329638899

5-char suffix tagger


In [15]:
five_suffix_tagger = AffixTagger(train_sents, affix_length=-5)

In [16]:
five_suffix_tagger.evaluate(train_sents)


Out[16]:
0.2376041999887097

6-char suffix tagger


In [17]:
six_suffix_tagger = AffixTagger(train_sents, affix_length=-6)

In [18]:
six_suffix_tagger.evaluate(train_sents)


Out[18]:
0.1721957736672751